{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Feature Selection for Machine Learning"
      ],
      "metadata": {}
    },
    {
      "cell_type": "markdown",
      "source": [
        "Feature selection is a removing unnecessary features."
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "# Importing the libraries\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fix_yahoo_finance is used to fetch data \n",
        "import fix_yahoo_finance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View columns \n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 downloaded\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2014-01-02</th>\n",
              "      <td>3.85</td>\n",
              "      <td>3.98</td>\n",
              "      <td>3.84</td>\n",
              "      <td>3.95</td>\n",
              "      <td>3.95</td>\n",
              "      <td>20548400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-03</th>\n",
              "      <td>3.98</td>\n",
              "      <td>4.00</td>\n",
              "      <td>3.88</td>\n",
              "      <td>4.00</td>\n",
              "      <td>4.00</td>\n",
              "      <td>22887200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-06</th>\n",
              "      <td>4.01</td>\n",
              "      <td>4.18</td>\n",
              "      <td>3.99</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>42398300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-07</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.25</td>\n",
              "      <td>4.11</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>42932100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-08</th>\n",
              "      <td>4.23</td>\n",
              "      <td>4.26</td>\n",
              "      <td>4.14</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>30678700</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            Open  High   Low  Close  Adj Close    Volume\n",
              "Date                                                    \n",
              "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
              "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
              "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
              "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
              "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "      <th>Increase_Decrease</th>\n",
              "      <th>Buy_Sell_on_Open</th>\n",
              "      <th>Buy_Sell</th>\n",
              "      <th>Returns</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2014-01-03</th>\n",
              "      <td>3.98</td>\n",
              "      <td>4.00</td>\n",
              "      <td>3.88</td>\n",
              "      <td>4.00</td>\n",
              "      <td>4.00</td>\n",
              "      <td>22887200</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0.012658</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-06</th>\n",
              "      <td>4.01</td>\n",
              "      <td>4.18</td>\n",
              "      <td>3.99</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>42398300</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0.032500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-07</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.25</td>\n",
              "      <td>4.11</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>42932100</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0.012107</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-08</th>\n",
              "      <td>4.23</td>\n",
              "      <td>4.26</td>\n",
              "      <td>4.14</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>30678700</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-09</th>\n",
              "      <td>4.20</td>\n",
              "      <td>4.23</td>\n",
              "      <td>4.05</td>\n",
              "      <td>4.09</td>\n",
              "      <td>4.09</td>\n",
              "      <td>30667600</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>-0.021531</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            Open  High   Low  Close  Adj Close    Volume  Increase_Decrease  \\\n",
              "Date                                                                          \n",
              "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200                  1   \n",
              "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300                  1   \n",
              "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100                  0   \n",
              "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700                  0   \n",
              "2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600                  0   \n",
              "\n",
              "            Buy_Sell_on_Open  Buy_Sell   Returns  \n",
              "Date                                              \n",
              "2014-01-03                 1         1  0.012658  \n",
              "2014-01-06                 1         1  0.032500  \n",
              "2014-01-07                 1         0  0.012107  \n",
              "2014-01-08                 0         0  0.000000  \n",
              "2014-01-09                 0         1 -0.021531  "
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "features = dataset.drop(['Adj Close', 'Close', 'Returns'], axis=1)"
      ],
      "outputs": [],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Univariate Selection"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.feature_selection import SelectKBest\n",
        "from sklearn.feature_selection import chi2\n",
        "\n",
        "array = features.values\n",
        "X = array.astype(int)\n",
        "Y = dataset['Adj Close'].values.astype(int)\n",
        "\n",
        "# Feature extraction\n",
        "test = SelectKBest(score_func=chi2, k=3)\n",
        "fit = test.fit(X, Y)\n",
        "\n",
        "# Summarize scores\n",
        "np.set_printoptions(precision=3)\n",
        "print(fit.scores_)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[4.297e+03 4.378e+03 4.241e+03 6.776e+11 8.232e+00 9.965e+00 1.452e+01]\n"
          ]
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "new_features = fit.transform(X)"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Show results\n",
        "print('Original number of features:', X.shape[1])\n",
        "print('Reduced number of features:', new_features.shape[1])"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Original number of features: 7\n",
            "Reduced number of features: 3\n"
          ]
        }
      ],
      "execution_count": 7,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Summarize selected features\n",
        "print(new_features[0:5,:])"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[[       3        4 22887200]\n",
            " [       4        4 42398300]\n",
            " [       4        4 42932100]\n",
            " [       4        4 30678700]\n",
            " [       4        4 30667600]]\n"
          ]
        }
      ],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "US = pd.DataFrame(fit.scores_, columns = [\"Univariate_Selection\"], index=features.columns)\n",
        "US = US.reset_index()"
      ],
      "outputs": [],
      "execution_count": 9,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "US.sort_values('Univariate_Selection',ascending=0)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>index</th>\n",
              "      <th>Univariate_Selection</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Volume</td>\n",
              "      <td>6.775760e+11</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>High</td>\n",
              "      <td>4.378068e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Open</td>\n",
              "      <td>4.296580e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Low</td>\n",
              "      <td>4.240549e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>Buy_Sell</td>\n",
              "      <td>1.451569e+01</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>Buy_Sell_on_Open</td>\n",
              "      <td>9.964768e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Increase_Decrease</td>\n",
              "      <td>8.232090e+00</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "               index  Univariate_Selection\n",
              "3             Volume          6.775760e+11\n",
              "1               High          4.378068e+03\n",
              "0               Open          4.296580e+03\n",
              "2                Low          4.240549e+03\n",
              "6           Buy_Sell          1.451569e+01\n",
              "5   Buy_Sell_on_Open          9.964768e+00\n",
              "4  Increase_Decrease          8.232090e+00"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Recursive Feature Elimination "
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.feature_selection import RFE\n",
        "from sklearn.linear_model import LogisticRegression"
      ],
      "outputs": [],
      "execution_count": 11,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Feature extraction\n",
        "model = LogisticRegression()\n",
        "rfe = RFE(model, 3)\n",
        "fit = rfe.fit(X, Y)\n",
        "print(\"Num Features: %s\" % (fit.n_features_))\n",
        "print(\"Selected Features: %s\" % (fit.support_))\n",
        "print(\"Feature Ranking: %s\" % (fit.ranking_))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Num Features: 3\n",
            "Selected Features: [ True  True False  True False False False]\n",
            "Feature Ranking: [1 1 2 1 3 5 4]\n"
          ]
        }
      ],
      "execution_count": 12,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "Selected = pd.DataFrame(rfe.support_, columns = [\"RFE\"], index=features.columns)\n",
        "Selected = Selected.reset_index()"
      ],
      "outputs": [],
      "execution_count": 13,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "Selected[Selected['RFE'] == True]"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 14,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>index</th>\n",
              "      <th>RFE</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Open</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>High</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Volume</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "    index   RFE\n",
              "0    Open  True\n",
              "1    High  True\n",
              "3  Volume  True"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 14,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Ridge regression "
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.linear_model import Ridge"
      ],
      "outputs": [],
      "execution_count": 15,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "ridge = Ridge(alpha=1.0)\n",
        "ridge.fit(X,Y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 16,
          "data": {
            "text/plain": [
              "Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,\n",
              "   normalize=False, random_state=None, solver='auto', tol=0.001)"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 16,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def pretty_print_coefs(coefs, names = None, sort = False):\n",
        "    if names == None:\n",
        "        names = [\"X%s\" % x for x in range(len(coefs))]\n",
        "    lst = zip(coefs, names)\n",
        "    if sort:\n",
        "        lst = sorted(lst,  key = lambda x:-np.abs(x[0]))\n",
        "    return \" + \".join(\"%s * %s\" % (round(coef, 3), name)\n",
        "                                   for coef, name in lst)"
      ],
      "outputs": [],
      "execution_count": 17,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print (\"Ridge model:\", pretty_print_coefs(ridge.coef_))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Ridge model: 0.021 * X0 + 0.497 * X1 + 0.481 * X2 + 0.0 * X3 + 0.019 * X4 + 0.137 * X5 + -0.001 * X6\n"
          ]
        }
      ],
      "execution_count": 18,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Principal Component Analysis"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.decomposition import PCA"
      ],
      "outputs": [],
      "execution_count": 19,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# feature extraction\n",
        "pca = PCA(n_components=3)\n",
        "fit = pca.fit(X)\n",
        "# summarize components\n",
        "print((\"Explained Variance: %s\") % fit.explained_variance_ratio_)\n",
        "print(fit.components_)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Explained Variance: [1.000e+00 3.343e-14 2.281e-16]\n",
            "[[ 9.465e-08  9.808e-08  9.156e-08  1.000e+00 -2.834e-09  6.781e-10\n",
            "   3.577e-10]\n",
            " [-5.777e-01 -5.813e-01 -5.729e-01  1.641e-07 -1.595e-02 -8.321e-04\n",
            "  -3.761e-03]\n",
            " [ 6.768e-02 -2.406e-02 -3.216e-02 -1.013e-09 -2.190e-01 -6.765e-01\n",
            "  -6.988e-01]]\n"
          ]
        }
      ],
      "execution_count": 20,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Feature Importance"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.ensemble import ExtraTreesClassifier"
      ],
      "outputs": [],
      "execution_count": 21,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# feature extraction\n",
        "model = ExtraTreesClassifier()\n",
        "model.fit(X, Y)\n",
        "print(model.feature_importances_)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[0.216 0.328 0.321 0.099 0.013 0.015 0.009]\n"
          ]
        }
      ],
      "execution_count": 22,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "ET = pd.DataFrame(model.feature_importances_, columns = [\"Extra Trees\"], index=features.columns)"
      ],
      "outputs": [],
      "execution_count": 23,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "ET = ET.reset_index()\n",
        "ET.sort_values(['Extra Trees'],ascending=0)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 24,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>index</th>\n",
              "      <th>Extra Trees</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>High</td>\n",
              "      <td>0.328005</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Low</td>\n",
              "      <td>0.320661</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Open</td>\n",
              "      <td>0.215932</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Volume</td>\n",
              "      <td>0.098947</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>Buy_Sell_on_Open</td>\n",
              "      <td>0.014538</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Increase_Decrease</td>\n",
              "      <td>0.012885</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>Buy_Sell</td>\n",
              "      <td>0.009032</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "               index  Extra Trees\n",
              "1               High     0.328005\n",
              "2                Low     0.320661\n",
              "0               Open     0.215932\n",
              "3             Volume     0.098947\n",
              "5   Buy_Sell_on_Open     0.014538\n",
              "4  Increase_Decrease     0.012885\n",
              "6           Buy_Sell     0.009032"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 24,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Random Forest Classifier"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.ensemble import RandomForestClassifier\n",
        "clf = RandomForestClassifier()\n",
        "\nclf.fit(X,Y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 25,
          "data": {
            "text/plain": [
              "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
              "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
              "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
              "            min_samples_leaf=1, min_samples_split=2,\n",
              "            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
              "            oob_score=False, random_state=None, verbose=0,\n",
              "            warm_start=False)"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 25,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "RFC = pd.DataFrame(clf.feature_importances_, columns = [\"RFC\"], index=features.columns)"
      ],
      "outputs": [],
      "execution_count": 26,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "RFC = RFC.reset_index()"
      ],
      "outputs": [],
      "execution_count": 27,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "RFC.sort_values(['RFC'],ascending=0)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 28,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>index</th>\n",
              "      <th>RFC</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Low</td>\n",
              "      <td>0.348548</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Open</td>\n",
              "      <td>0.247975</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>High</td>\n",
              "      <td>0.226145</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Volume</td>\n",
              "      <td>0.126685</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>Buy_Sell_on_Open</td>\n",
              "      <td>0.019749</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>Buy_Sell</td>\n",
              "      <td>0.016977</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Increase_Decrease</td>\n",
              "      <td>0.013921</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "               index       RFC\n",
              "2                Low  0.348548\n",
              "0               Open  0.247975\n",
              "1               High  0.226145\n",
              "3             Volume  0.126685\n",
              "5   Buy_Sell_on_Open  0.019749\n",
              "6           Buy_Sell  0.016977\n",
              "4  Increase_Decrease  0.013921"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 28,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Chi Square on Features "
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.feature_selection import SelectKBest\n",
        "from sklearn.feature_selection import chi2\n",
        "\n",
        "model = SelectKBest(score_func=chi2, k=5)\n",
        "fit = model.fit(X, Y)"
      ],
      "outputs": [],
      "execution_count": 29,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(fit.scores_)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[4.297e+03 4.378e+03 4.241e+03 6.776e+11 8.232e+00 9.965e+00 1.452e+01]\n"
          ]
        }
      ],
      "execution_count": 30,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "chi_sq = pd.DataFrame(fit.scores_, columns = [\"Chi_Square\"], index=features.columns)"
      ],
      "outputs": [],
      "execution_count": 31,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "chi_sq = chi_sq.reset_index()"
      ],
      "outputs": [],
      "execution_count": 32,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "chi_sq.sort_values('Chi_Square',ascending=0)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 33,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>index</th>\n",
              "      <th>Chi_Square</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Volume</td>\n",
              "      <td>6.775760e+11</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>High</td>\n",
              "      <td>4.378068e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Open</td>\n",
              "      <td>4.296580e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Low</td>\n",
              "      <td>4.240549e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>Buy_Sell</td>\n",
              "      <td>1.451569e+01</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>Buy_Sell_on_Open</td>\n",
              "      <td>9.964768e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Increase_Decrease</td>\n",
              "      <td>8.232090e+00</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "               index    Chi_Square\n",
              "3             Volume  6.775760e+11\n",
              "1               High  4.378068e+03\n",
              "0               Open  4.296580e+03\n",
              "2                Low  4.240549e+03\n",
              "6           Buy_Sell  1.451569e+01\n",
              "5   Buy_Sell_on_Open  9.964768e+00\n",
              "4  Increase_Decrease  8.232090e+00"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 33,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "L1 Feature Selection"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.svm import LinearSVC\n",
        "from sklearn.feature_selection import SelectFromModel"
      ],
      "outputs": [],
      "execution_count": 34,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "lsvc = LinearSVC(C=0.01, penalty=\"l1\", dual=False).fit(X, Y)\n",
        "model = SelectFromModel(lsvc,prefit=True)"
      ],
      "outputs": [],
      "execution_count": 35,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "l1 = pd.DataFrame(model.get_support(), columns = [\"L1\"], index=features.columns)"
      ],
      "outputs": [],
      "execution_count": 36,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "l1 = l1.reset_index()"
      ],
      "outputs": [],
      "execution_count": 37,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "l1[l1['L1'] == True]"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 38,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>index</th>\n",
              "      <th>L1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Open</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>High</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Low</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "  index    L1\n",
              "0  Open  True\n",
              "1  High  True\n",
              "2   Low  True"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 38,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Multicollinearity Variance Inflation factor"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from statsmodels.stats.outliers_influence import variance_inflation_factor"
      ],
      "outputs": [],
      "execution_count": 40,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def calculate_vif(features):\n",
        "    vif = pd.DataFrame()\n",
        "    vif[\"Features\"] = features.columns\n",
        "    vif[\"VIF\"] = [variance_inflation_factor(features.values, i) for i in range(features.shape[1])]    \n",
        "    return(vif)"
      ],
      "outputs": [],
      "execution_count": 41,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "vif = calculate_vif(features)\n",
        "while vif['VIF'][vif['VIF'] > 10].any():\n",
        "    remove = vif.sort_values('VIF',ascending=0)['Features'][:1]\n",
        "    features.drop(remove,axis=1,inplace=True)\n",
        "    vif = calculate_vif(features)"
      ],
      "outputs": [],
      "execution_count": 42,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "vif"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 43,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Features</th>\n",
              "      <th>VIF</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Low</td>\n",
              "      <td>5.213371</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Volume</td>\n",
              "      <td>4.093310</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Increase_Decrease</td>\n",
              "      <td>1.695261</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Buy_Sell_on_Open</td>\n",
              "      <td>1.754247</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Buy_Sell</td>\n",
              "      <td>1.793754</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            Features       VIF\n",
              "0                Low  5.213371\n",
              "1             Volume  4.093310\n",
              "2  Increase_Decrease  1.695261\n",
              "3   Buy_Sell_on_Open  1.754247\n",
              "4           Buy_Sell  1.793754"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 43,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from functools import reduce\n",
        "dfs = [US, RFE, ET, RFC, chi_sq, l1, vif]\n",
        "final_results = reduce(lambda left,right: pd.merge(left,right,on='index'), dfs)"
      ],
      "outputs": [
        {
          "output_type": "error",
          "ename": "ValueError",
          "evalue": "can not merge DataFrame with instance of type <class 'abc.ABCMeta'>",
          "traceback": [
            "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
            "\u001b[1;32m<ipython-input-48-a19680e20fd0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mfunctools\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mdfs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mUS\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mRFE\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mET\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mRFC\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mchi_sq\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ml1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvif\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mfinal_results\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mleft\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mright\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mright\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mon\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'index'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdfs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[1;32m<ipython-input-48-a19680e20fd0>\u001b[0m in \u001b[0;36m<lambda>\u001b[1;34m(left, right)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mfunctools\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mdfs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mUS\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mRFE\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mET\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mRFC\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mchi_sq\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ml1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvif\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mfinal_results\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mleft\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mright\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mleft\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mright\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mon\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'index'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdfs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[1;32m~\\Anaconda3\\envs\\py35\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001b[0m in \u001b[0;36mmerge\u001b[1;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[0;32m     59\u001b[0m                          \u001b[0mright_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mright_index\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msort\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msort\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msuffixes\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     60\u001b[0m                          \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindicator\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mindicator\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 61\u001b[1;33m                          validate=validate)\n\u001b[0m\u001b[0;32m     62\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     63\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
            "\u001b[1;32m~\\Anaconda3\\envs\\py35\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[0;32m    525\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mright\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    526\u001b[0m             raise ValueError('can not merge DataFrame with instance of '\n\u001b[1;32m--> 527\u001b[1;33m                              'type {right}'.format(right=type(right)))\n\u001b[0m\u001b[0;32m    528\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    529\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_bool\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mleft_index\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
            "\u001b[1;31mValueError\u001b[0m: can not merge DataFrame with instance of type <class 'abc.ABCMeta'>"
          ]
        }
      ],
      "execution_count": 48,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "nbconvert_exporter": "python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "mimetype": "text/x-python",
      "file_extension": ".py",
      "name": "python",
      "pygments_lexer": "ipython3",
      "version": "3.5.5"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.12.2"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}